# importing the dataset
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_stata('Kliesen2013.dta')
df.head()


df = df.set_index(['MANValAdded','year'])
df.head(5)


# reset MANValAdded and year as regular columns so we can use them for the graph
df.reset_index(inplace=True)


from matplotlib.pyplot import figure
plt.style.use('seaborn-white')
figure(num=None, figsize=(6, 4), dpi=80, facecolor='w', edgecolor='k')
plt.rcParams['axes.facecolor'] = 'ivory'
plt.plot(df[['year']], df[['MANValAdded']], linewidth='4', color='firebrick', alpha=.85)
plt.title('Manufacturing over time')
plt.xlabel('year', fontsize=12)
plt.xlim(1950, 2011)

(1950, 2011)


df['RealMAN'] = df['MANValAdded'] / df['MANPriceIndex']
df.tail(10)


figure(num=None, figsize=(10, 4), dpi=80, facecolor='w', edgecolor='k')
plt.style.use('seaborn-white')
plt.rcParams['axes.facecolor'] = 'ivory'

plt.subplot(1,2,1)
plt.plot(df[['year']], df[['MANValAdded']], linewidth='4', color='firebrick', alpha=.85)
plt.title('U.S. Manufacturing over time', fontsize=14)
plt.xlabel('year', fontsize=12)
plt.xlim(1950, 2011)

plt.subplot(1,2,2)
plt.plot(df[['year']], df[['RealMAN']], linewidth='4', color='rebeccapurple', alpha=.85)
plt.title('The Real Manufacturing over time', fontsize=14)
plt.xlabel('year', fontsize=12)
plt.xlim(1950, 2011)
plt.tight_layout()


df['ln_realMAN'] = np.log(df['RealMAN'])
df.head()


figure(num=None, figsize=(12, 3.5), dpi=80, facecolor='w', edgecolor='k')
plt.style.use('seaborn-white')
plt.rcParams['axes.facecolor'] = 'ivory'

plt.subplot(1,3,1)
plt.plot(df[['year']], df[['MANValAdded']], linewidth='4', color='firebrick', alpha=.85)
plt.title('The Nominal Manufacturing', fontsize=14)
plt.xlabel('year', fontsize=12)
plt.xlim(1950, 2011)

plt.subplot(1,3,2)
plt.plot(df[['year']], df[['RealMAN']], linewidth='4', color='rebeccapurple', alpha=.85)
plt.title('The Real Manufacturing', fontsize=14)
plt.xlabel('year', fontsize=12)
plt.xlim(1950, 2011)
plt.tight_layout()

ax = plt.subplot(1,3,3)
plt.style.use('seaborn-white')
plt.rcParams['axes.facecolor'] = 'ivory'
plt.plot(df[['year']], df[['ln_realMAN']], linewidth='4', color='darkcyan', alpha=.85)
plt.title('% Δ in the Real Manufacturing', fontsize=14)
plt.xlabel('year', fontsize=12)
plt.xlim(1950, 2011)

(1950, 2011)


import statsmodels.formula.api as smf

reg0 = smf.ols('ln_realMAN ~ year', data=df).fit()
print(reg0.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:             ln_realMAN   R-squared:                       0.987
Model:                            OLS   Adj. R-squared:                  0.987
Method:                 Least Squares   F-statistic:                     4612.
Date:                Mon, 06 Jan 2020   Prob (F-statistic):           1.87e-58
Time:                        20:04:09   Log-Likelihood:                 82.003
No. Observations:                  62   AIC:                            -160.0
Df Residuals:                      60   BIC:                            -155.8
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept    -60.6225      0.921    -65.813      0.000     -62.465     -58.780
year           0.0316      0.000     67.914      0.000       0.031       0.033
==============================================================================
Omnibus:                        1.652   Durbin-Watson:                   0.689
Prob(Omnibus):                  0.438   Jarque-Bera (JB):                1.475
Skew:                          -0.372   Prob(JB):                        0.478
Kurtosis:                       2.863   Cond. No.                     2.19e+05
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 2.19e+05. This might indicate that there are
strong multicollinearity or other numerical problems.


# download the modul
from statsmodels.tsa.stattools import adfuller

# Run Augmented Dickey Fuller Test for the Ln of real manufacturing
X = df['ln_realMAN']
result = adfuller(X)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
    print('\t%s: %.3f' % (key, value))

ADF Statistic: -0.700441
p-value: 0.846635
Critical Values:
	1%: -3.546
	5%: -2.912
	10%: -2.594


from sklearn.linear_model import LinearRegression
from scipy.stats import ttest_ind
# now run the regression
reg = LinearRegression()
x = df[['year']]
y = df[['ln_realMAN']]

# fitting the model
reg = reg.fit(x, y)

# creating yhat or the prediction line
yhat = reg.predict(x)

# we will extract residuals from our data by substracting predicted values from the actual values
res = df[['ln_realMAN']] - yhat
print(res)

    ln_realMAN
0    -0.058748
1     0.016875
2     0.018542
3     0.055137
4    -0.048252
..         ...
57    0.058577
58   -0.033209
59   -0.163531
60   -0.088720
61   -0.078443

[62 rows x 1 columns]


import seaborn as sns
plt.style.use('seaborn-white')
figure(num=None, figsize=(7, 4.5), dpi=80, facecolor='w', edgecolor='k')
plt.rcParams['axes.facecolor'] = 'ivory'
plt.scatter(df[['year']], df[['ln_realMAN']], color='darkcyan', alpha=.85, marker='o', s=60)
plt.plot(df[['year']], yhat, linewidth='3', color='black', alpha=.5)

sns.residplot(x, y, color="darkorange")
plt.title('% Δ in the Real Manufacturing & Its Residuals', fontsize=14)
plt.xlabel('year', fontsize=14)
plt.xlim(1950, 2011)
plt.annotate('Residuals', (1986, .2), fontsize=12, color='darkorange', alpha=.85)
plt.annotate('Fitted Line', (1972, 2.1), fontsize=12, color='black', alpha=.5)
plt.annotate('Real MANF', (1997, 2.2), fontsize=12, color='darkcyan', alpha=.85)

Text(1997, 2.2, 'Real MANF')


# we run the regression of manufacturing production on its lagged values
# first we need to extracted lag 1 value
df['ln_realMAN_l1'] = df[['ln_realMAN']].shift()
df['ln_realMAN_l1'].dropna
df['ln_realMAN_l1']

0          NaN
1     0.911046
2     1.018254
3     1.051508
4     1.119689
        ...   
57    2.794005
58    2.828762
59    2.768561
60    2.669824
61    2.776222
Name: ln_realMAN_l1, Length: 62, dtype: float64


# We do the regression and look at the coef. and adj.R^2
reg1 = smf.ols('ln_realMAN ~ ln_realMAN_l1', data=df).fit()
print(reg1.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:             ln_realMAN   R-squared:                       0.991
Model:                            OLS   Adj. R-squared:                  0.991
Method:                 Least Squares   F-statistic:                     6351.
Date:                Mon, 06 Jan 2020   Prob (F-statistic):           9.02e-62
Time:                        20:04:10   Log-Likelihood:                 91.997
No. Observations:                  61   AIC:                            -180.0
Df Residuals:                      59   BIC:                            -175.8
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
=================================================================================
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
Intercept         0.0533      0.025      2.151      0.036       0.004       0.103
ln_realMAN_l1     0.9885      0.012     79.690      0.000       0.964       1.013
==============================================================================
Omnibus:                        6.116   Durbin-Watson:                   2.103
Prob(Omnibus):                  0.047   Jarque-Bera (JB):                6.232
Skew:                          -0.763   Prob(JB):                       0.0443
Kurtosis:                       2.647   Cond. No.                         8.78
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.


# We'll create lag(2) and Lag(3) values for our regression
df['ln_realMAN_l2'] = df[['ln_realMAN']].shift(2)
df['ln_realMAN_l3'] = df[['ln_realMAN']].shift(3)

# We regress with Lag(1) and Lag(2) and save coeficients for the summary table
reg2 = smf.ols('ln_realMAN ~ ln_realMAN_l1 + ln_realMAN_l2', data=df).fit()

# We regress with Lag(1), Lag(2) and Lag(3) and save coeficients for the summary table
reg3 = smf.ols('ln_realMAN ~ ln_realMAN_l1 + ln_realMAN_l2 + ln_realMAN_l3', data=df).fit()


# We instal package needed to produce the table with all the set of independent variables
import statsmodels.api as sm
from statsmodels.iolib.summary2 import summary_col
from linearmodels.iv import IV2SLS


# we build table charachteristics and plug regressions
info_dict={'R-squared' : lambda x: f"{x.rsquared:.2f}",
           'No. observations' : lambda x: f"{int(x.nobs):d}"}

results_table = summary_col(results=[reg1, reg2, reg3],
                            float_format='%0.2f',
                            stars = True,
                            model_names=['Model 1',
                                         'Model 2',
                                        'Model 3'],
                            info_dict=info_dict,)

results_table.add_title('Table 1 - OLS Regressions')

print(results_table)

       Table 1 - OLS Regressions
========================================
                 Model 1 Model 2 Model 3
----------------------------------------
Intercept        0.05**  0.05*   0.06*  
                 (0.02)  (0.03)  (0.03) 
ln_realMAN_l1    0.99*** 0.92*** 0.91***
                 (0.01)  (0.13)  (0.13) 
ln_realMAN_l2            0.07    -0.08  
                         (0.13)  (0.18) 
ln_realMAN_l3                    0.16   
                                 (0.13) 
R-squared        0.99    0.99    0.99   
No. observations 61      60      59     
========================================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01


fig, axes = plt.subplots(1, 2, figsize=(15,4))

fig = sm.graphics.tsa.plot_acf(df.iloc[1:]['ln_realMAN'], lags=10, ax=axes[0])
fig = sm.graphics.tsa.plot_pacf(df.iloc[1:]['ln_realMAN'], lags=10, ax=axes[1])


df['MANRealInd_l1'] = df['MANRealInd'].shift()
df['EXPReal_1'] = df['EXPReal'].shift()
df['IMPReal_1'] = df['IMPReal'].shift()

df['MANRate'] = (df['MANRealInd'] - df['MANRealInd_l1']) / df['MANRealInd_l1']
df['EXPRate'] = (df['EXPReal'] - df['EXPReal_1']) / df['EXPReal_1']
df['IMPRate'] = (df['IMPReal'] - df['IMPReal_1']) / df['IMPReal_1']

df.tail()


# now run the regression
reg = LinearRegression()
x = df[['EXPRate']].dropna()
y = df[['MANRate']].dropna()

# fitting the model
reg = reg.fit(x, y)

# creating yhat or the prediction line
yhat = reg.predict(x)


plt.scatter(x, y, color='slateblue', alpha=.8, marker='o', s=60)
plt.plot(x, yhat, linewidth='2', color='black', alpha=.7)
plt.xlabel('Δ in EXPORTS', fontsize=12)
plt.ylabel('Δ in MANUFACTURING', fontsize=12)
plt.show()


# now run the regression
reg = LinearRegression()
x = df[['IMPRate']].dropna()

# fitting the model
reg = reg.fit(x, y)

# creating yhat or the prediction line
yhat = reg.predict(x)


plt.scatter(x, y, color='magenta', alpha=.8, marker='o', s=60)
plt.plot(x, yhat, linewidth='2', color='black', alpha=.7)
plt.xlabel('Δ in IMPORTS', fontsize=12)
plt.ylabel('Δ in MANUFACTURING', fontsize=12)
plt.show()


print(reg.coef_)

[[0.59601921]]

Variable	Year
ExptoChina	Nominal values of U.S. export to China
ExotoWorld	Nominal value of total U.S. export
MANValAdded	Manufacturing value added. Nominal value.
MANPriceIndex	Manufacturing price index (2005=100)
GDPPriceIndex	GDP price index (2005=100)
GoodsExp2005	U.S. export of goods, billion dollars, real (2005 chained price index)
GoodsImp2005	U.S. import of goods, billion dollars, real (2005 chained price index)
USGDPReal	U.S. real GDP
MANRealInd	U.S. Manufacturing production, indexed (2005=100)
DollarNom	Nominal value of dollar (trade-weighted)
DollarReal	Real value of dollar (trade-weighted)
OilNom	Nominal oil price (Refiners’ acquisitions price)
FuelReal	Real price of fuel
EquiptSoft	Equipment and software fixed private investment
ExpReal	Real goods export
ImpReal	Real goods import

	MANValAdded	year	ExptoChina	ExptoWorld	MANPriceIndex	GDPPriceIndex	GoodsEXP2005	GoodsIMP2005	USGDPReal	MANRealInd	DollarNom	DollarReal	OilNom	FuelReal	EquipSofInvR	EXPReal	IMPReal	RealMAN
52	1355.5	2002	22127.700000	693101.4	99.736	92.192	762.7	1372.2	11553.0	87.6	126.82	110.11	24.02	93.2	93.089	100.119241	830.3	13.590880
53	1374.3	2003	28367.942859	724771.0	98.015	94.134	776.4	1439.9	11840.7	88.7	119.27	103.45	28.60	112.9	94.413	119.580990	851.4	14.021323
54	1482.7	2004	34427.772456	814874.7	97.745	96.784	842.6	1599.3	12263.8	91.1	113.76	98.82	36.91	126.9	96.852	131.024656	917.3	15.169062
55	1569.3	2005	41192.010123	901081.8	100.000	100.000	906.1	1708.0	12638.4	94.8	110.84	97.18	50.32	156.4	100.000	156.400000	995.6	15.693000
56	1648.4	2006	53673.008343	1025967.5	100.842	103.237	991.5	1809.1	12976.2	97.1	108.71	96.07	60.10	166.7	102.927	161.959447	1069.6	16.346364
57	1698.0	2007	62936.891576	1148198.7	100.328	106.231	1088.1	1856.1	13228.9	100.0	103.58	91.48	67.98	177.6	105.654	168.095860	1109.0	16.924488
58	1628.5	2008	69732.837543	1287442.0	102.192	108.565	1157.0	1784.8	13228.8	95.0	99.90	87.65	94.29	214.6	107.563	199.510984	1082.0	15.935690
59	1540.2	2009	69496.678611	1056043.0	106.681	109.732	1018.6	1506.0	12880.6	82.2	105.70	91.25	59.20	158.7	108.119	146.782712	916.3	14.437435
60	1701.9	2010	91880.613079	1278263.2	105.983	111.000	1164.9	1729.3	13248.2	86.6	102.07	87.18	76.70	185.8	109.193	170.157428	1056.1	16.058236
61	1837.0	2011	103878.600000	1480646.0	109.706	113.338	1251.7	1828.6	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	16.744754

	MANValAdded	year	ExptoChina	ExptoWorld	MANPriceIndex	GDPPriceIndex	GoodsEXP2005	GoodsIMP2005	USGDPReal	MANRealInd	...	ln_realMAN	ln_realMAN_l1	ln_realMAN_l2	ln_realMAN_l3	MANRealInd_l1	EXPReal_1	IMPReal_1	MANRate	EXPRate	IMPRate
57	1698.0	2007	62936.891576	1148198.7	100.328	106.231	1088.1	1856.1	13228.9	100.0	...	2.828762	2.794005	2.753215	2.719258	97.1	161.959447	1069.6	0.029866	0.037889	0.036836
58	1628.5	2008	69732.837543	1287442.0	102.192	108.565	1157.0	1784.8	13228.8	95.0	...	2.768561	2.828762	2.794005	2.753215	100.0	168.095860	1109.0	-0.050000	0.186888	-0.024346
59	1540.2	2009	69496.678611	1056043.0	106.681	109.732	1018.6	1506.0	12880.6	82.2	...	2.669824	2.768561	2.828762	2.794005	95.0	199.510984	1082.0	-0.134737	-0.264288	-0.153142
60	1701.9	2010	91880.613079	1278263.2	105.983	111.000	1164.9	1729.3	13248.2	86.6	...	2.776222	2.669824	2.768561	2.828762	82.2	146.782712	916.3	0.053528	0.159247	0.152570
61	1837.0	2011	103878.600000	1480646.0	109.706	113.338	1251.7	1828.6	NaN	NaN	...	2.818085	2.776222	2.669824	2.768561	86.6	170.157428	1056.1	NaN	NaN	NaN

Problem Solving 04¶

Replication of a paper published by Kevin L. Kliesen and John A. Tatom - "U.S. Manufacturing and the Importance of International Trade: It’s Not What You Think"¶

Original paper, published by Federal Reserve Bank of St. Louis Review in 2013, you can download from here and dataset can be downloaded from here ¶

Part 1:¶

Part 2:¶

Part 3:¶

	year	ExptoChina	ExptoWorld	MANValAdded	MANPriceIndex	GDPPriceIndex	GoodsEXP2005	GoodsIMP2005	USGDPReal	MANRealInd	DollarNom	DollarReal	OilNom	FuelReal	EquipSofInvR	EXPReal	IMPReal
0	1950	NaN	NaN	79.4	31.927	14.628	39.0	49.7	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	1951	NaN	NaN	94.7	34.208	15.635	47.5	48.7	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2	1952	NaN	NaN	98.4	34.382	15.976	45.1	49.7	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
3	1953	NaN	NaN	107.5	35.086	16.178	42.0	52.6	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4	1954	NaN	NaN	101.7	35.664	16.342	44.4	48.5	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

Problem Solving 04¶

Replication of a paper published by Kevin L. Kliesen and John A. Tatom - "U.S. Manufacturing and the Importance of International Trade: It’s Not What You Think"¶

Original paper, published by Federal Reserve Bank of St. Louis Review in 2013, you can download from here and dataset can be downloaded from here¶

Part 1:¶

Part 2:¶

Part 3:¶

Original paper, published by Federal Reserve Bank of St. Louis Review in 2013, you can download from here and dataset can be downloaded from here ¶